This is an R Markdown file for Exploratory analysis of data analyses in the protocol of a meta research which aims at surveying sample characteristics of big team science in psychology.
In this script, we will use data from PSA 001 project Jones et al., 2021, Nature Human Behavior, and partial articles published in psychological science in 2014 Red et al., 2018, Proceedings of the National Academy of Sciences, and Ruggeri et al. (2022)Nature Human Behavior to exemplify the analyses that we are going to use in our this project.
library(tidyverse) # ggplot, dplyr, %>%, and friends
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(brms) # Bayesian modeling through Stan
## Loading required package: Rcpp
## Loading 'brms' package (version 2.20.4). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
##
## The following object is masked from 'package:stats':
##
## ar
library(marginaleffects) # Calculate marginal effects for regression models
library(tidybayes) # Manipulate Stan objects in a tidy way
##
## Attaching package: 'tidybayes'
##
## The following objects are masked from 'package:brms':
##
## dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(patchwork) # Combine ggplot objects
library(ggrepel) # Automatically position labels
library(scales) # Format numbers in nice ways
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(collapse)
## collapse 2.0.9, see ?`collapse-package` or ?`collapse-documentation`
##
## Attaching package: 'collapse'
##
## The following object is masked from 'package:lubridate':
##
## is.Date
##
## The following object is masked from 'package:tidyr':
##
## replace_na
##
## The following object is masked from 'package:stats':
##
## D
library(broom) # Convert model objects to data frames
library(broom.mixed) # Convert brms model objects to data frames
library(extraDistr) # Use extra distributions like dprop()
##
## Attaching package: 'extraDistr'
##
## The following objects are masked from 'package:brms':
##
## ddirichlet, dfrechet, pfrechet, qfrechet, rdirichlet, rfrechet
##
## The following object is masked from 'package:purrr':
##
## rdunif
library(ggdist) # Special geoms for posterior distributions
##
## Attaching package: 'ggdist'
##
## The following objects are masked from 'package:brms':
##
## dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(gghalves) # Special half geoms
library(ggbeeswarm) # Special distribution-shaped point jittering
library(modelsummary) # Create side-by-side regression tables
library(ggthemes)
set.seed(1234) # Make everything reproducible
# Define the goodness-of-fit stats to include in modelsummary()
gof_stuff <- tribble(
~raw, ~clean, ~fmt,
"nobs", "N", 0,
"r.squared", "R²", 3
)
# Custom ggplot theme to make pretty plots
# Get the font at https://fonts.google.com/specimen/Barlow+Semi+Condensed
theme_clean <- function() {
theme_minimal(base_family = "Barlow Semi Condensed") +
theme(panel.grid.minor = element_blank(),
plot.title = element_text(family = "BarlowSemiCondensed-Bold"),
axis.title = element_text(family = "BarlowSemiCondensed-Medium"),
strip.text = element_text(family = "BarlowSemiCondensed-Bold",
size = rel(1), hjust = 0),
strip.background = element_rect(fill = "grey80", color = NA))
}
# Make labels use Barlow by default
update_geom_defaults("label_repel", list(family = "Barlow Semi Condensed"))
# Format things as percentage points
label_pp <- label_number(accuracy = 1, scale = 100,
suffix = " pp.", style_negative = "minus")
label_pp_tiny <- label_number(accuracy = 0.01, scale = 100,
suffix = " pp.", style_negative = "minus")
######1.Country-level relationship between proportion of sample and socioeconomic/cultural factors for traditional studies (using PS2014 as an example) and big team science (using PSA 001 as an example)
####_1.1GDP_per_capita
gdpdata <- read_csv("gdpdata.csv")
## Rows: 185 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country, country_map, continent
## dbl (7): GDP_per_capita, bts, total_bts, percentage_sample, ps, total_ps, ps...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gdpdata
## # A tibble: 185 × 10
## country country_map continent GDP_per_capita bts total_bts
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 AND AD Europe 42.0 0 11484
## 2 ARE AE Asia 53.8 80 11484
## 3 ATG AG North America 18.7 0 11484
## 4 ARM AM Asia 7.01 0 11484
## 5 AGO AO Africa 3.00 0 11484
## 6 ARG AR South America 13.7 91 11484
## 7 AUT AT Europe 52.1 236 11484
## 8 AUS AU Oceania 64.5 885 11484
## 9 AZE AZ Asia 7.74 0 11484
## 10 BIH BA Europe 7.59 0 11484
## # ℹ 175 more rows
## # ℹ 4 more variables: percentage_sample <dbl>, ps <dbl>, total_ps <dbl>,
## # ps2014 <dbl>
##BTS
gdp_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ GDP_per_capita,
phi ~ GDP_per_capita,
zi ~ GDP_per_capita),
data = gdpdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "gdp_model_beta_zi_bts"
)
tidy(gdp_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(gdp_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -1.35 4.85 -4.48 7.04
## 2 fixed cond phi_(Intercept) -1.75 12.2 -23.0 5.94
## 3 fixed zi (Intercept) 3.75 2.64 1.67 8.30
## 4 fixed cond GDP_per_capita -0.0666 0.134 -0.299 0.0243
## 5 fixed cond phi_GDP_per_capita 0.311 0.616 -0.0622 1.38
## 6 fixed zi GDP_per_capita -0.176 0.203 -0.527 -0.0335
ame_beta_zi_gdp_bts <- gdp_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "GDP_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_gdp_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.91 0.950 4.71 0.95 median hdi
## 2 2.91 37.4 46.8 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_gdp_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 13.73 19.09 1.58 46.76 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot2::ggplot(ame_beta_zi_gdp_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of GDP per capita", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
gdp_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ GDP_per_capita,
phi ~ GDP_per_capita,
zi ~ GDP_per_capita),
data = gdpdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 4, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "gdp_model_beta_zi_ps2014"
)
tidy(gdp_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(gdp_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.29 0.623 -4.52 -2.09
## 2 fixed cond phi_(Intercept) 0.768 0.706 -0.683 2.06
## 3 fixed zi (Intercept) 3.33 0.778 1.93 4.99
## 4 fixed cond GDP_per_capita -0.00533 0.0122 -0.0286 0.0196
## 5 fixed cond phi_GDP_per_capita 0.00866 0.0134 -0.0176 0.0345
## 6 fixed zi GDP_per_capita -0.294 0.102 -0.536 -0.134
ame_beta_zi_gdp_ps2014 <- gdp_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "GDP_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_gdp_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 10.2 3.64 22.4 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_gdp_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 11.42 5.43 5.16 21.8 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_gdp_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of GDP per capita", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.2_R&D_investment
rddata <- read_csv("rddata.csv")
## Rows: 70 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country, country_map, continent
## dbl (7): R_D, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rddata
## # A tibble: 70 × 10
## country R_D country_map continent bts total_bts percentage_sample ps
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 United_A… 1.3 AE Asia 80 11484 0.00697 0
## 2 Armenia 0.19 AM Asia 0 11484 0 0
## 3 Austria 3.17 AT Europe 236 11484 0.0206 33
## 4 Azerbaij… 0.18 AZ Asia 0 11484 0 0
## 5 Bosnia H… 0.2 BA Europe 0 11484 0 33
## 6 Belgium 2.82 BE Europe 86 11484 0.00749 125
## 7 Bulgaria 0.77 BG Europe 0 11484 0 33
## 8 Burundi 0.21 BI Africa 0 11484 0 0
## 9 Brunei_D… 0.28 BN Asia 0 11484 0 0
## 10 Belarus 0.61 BY Europe 0 11484 0 33
## # ℹ 60 more rows
## # ℹ 2 more variables: total_ps <dbl>, ps2014 <dbl>
##BTS
rd_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ R_D,
phi ~ R_D,
zi ~ R_D),
data = rddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "rd_model_beta_zi_bts"
)
tidy(rd_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(rd_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.89 0.393 -4.54 -2.98
## 2 fixed cond phi_(Intercept) 4.93 0.730 3.27 6.10
## 3 fixed zi (Intercept) 2.50 0.612 1.36 3.81
## 4 fixed cond R_D 0.211 0.241 -0.286 0.655
## 5 fixed cond phi_R_D -0.958 0.325 -1.56 -0.274
## 6 fixed zi R_D -2.01 0.579 -3.29 -1.02
ame_beta_zi_rd_bts <- rd_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "R_D") %>%
marginaleffects::posterior_draws()
ame_beta_zi_rd_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 106. 40.0 239. 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_rd_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 120.32 58.03 57.07 235.53 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_rd_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of R&D investment", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
rd_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ R_D,
phi ~ R_D,
zi ~ R_D),
data = rddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "rd_model_beta_zi_ps2014"
)
tidy(rd_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(rd_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -6.36 0.125 -6.62 -6.12
## 2 fixed cond phi_(Intercept) 11.2 0.866 9.56 12.9
## 3 fixed zi (Intercept) 2.09 0.601 1.00 3.35
## 4 fixed cond R_D 0.647 0.172 0.327 1.00
## 5 fixed cond phi_R_D -3.27 0.367 -4.04 -2.62
## 6 fixed zi R_D -3.17 0.837 -5.02 -1.69
ame_beta_zi_rd_ps2014 <- rd_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "R_D") %>%
marginaleffects::posterior_draws()
ame_beta_zi_rd_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 50.6 19.5 126. 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_rd_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 59.29 32.76 26.1 122.61 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_rd_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of R&D investment", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.3_Number of universities per 100000 people
universitydata <- read_csv("universitydata.csv")
## Rows: 217 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country1, country_map, continent
## dbl (9): number of universities, pop, number_of_universities_per_capita, bts...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
universitydata
## # A tibble: 217 × 12
## country1 country_map continent number of universitie…¹ pop
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Andorra AD Europe 3 7.90e1
## 2 United Arab Emirates AE Asia 72 9.37e3
## 3 Afghanistan AF Asia 96 4.01e4
## 4 Antigua and Barbuda AG North America 3 9.32e1
## 5 Anguilla AI North America 4 1.58e1
## 6 Albania AL Europe 44 2.85e3
## 7 Armenia AM Asia 51 2.79e3
## 8 Angola AO Africa 23 3.45e4
## 9 Argentina AR South America 146 4.53e4
## 10 American Samoa AS Oceania 1 4.51e1
## # ℹ 207 more rows
## # ℹ abbreviated name: ¹`number of universities`
## # ℹ 7 more variables: number_of_universities_per_capita <dbl>, bts <dbl>,
## # total_bts <dbl>, percentage_sample <dbl>, ps <dbl>, total_ps <dbl>,
## # ps2014 <dbl>
##BTS
university_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ number_of_universities_per_capita,
phi ~ number_of_universities_per_capita,
zi ~ number_of_universities_per_capita),
data = universitydata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "university_model_beta_zi_bts"
)
tidy(university_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(university_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -2.41e-1 0.144 -3.85e-1 -0.0742
## 2 fixed cond phi_(Intercept) -4.48e+0 0.783 -5.26e+0 -3.58
## 3 fixed zi (Intercept) -2.46e+1 8.33 -3.29e+1 -16.0
## 4 fixed cond number_of_universities… 8.99e-3 0.000239 8.39e-3 0.00940
## 5 fixed cond phi_number_of_universi… 1.22e-1 0.0166 1.05e-1 0.142
## 6 fixed zi number_of_universities… 7.20e-1 0.261 4.52e-1 0.980
ame_beta_zi_university_bts <- university_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "number_of_universities_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_university_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 9.00 4.53 7.91 0.95 median hdi
## 2 9.00 9.83 10.7 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_university_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 8.49 2.29 5.13 10.7 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_university_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Number of universities per 100000 people", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
university_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ number_of_universities_per_capita,
phi ~ number_of_universities_per_capita,
zi ~ number_of_universities_per_capita),
data = universitydata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "university_model_beta_zi_ps2014"
)
tidy(university_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(university_model_beta_zi_ps2014, effects = "fixed"):
## some parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) 0.846 1.09 -0.432 1.93
## 2 fixed cond phi_(Intercept) -1.86 0.292 -2.36 -1.58
## 3 fixed zi (Intercept) 1.07 0.0624 0.973 1.12
## 4 fixed cond number_of_universities… -0.0239 0.0261 -0.0499 0.00786
## 5 fixed cond phi_number_of_universi… 0.0463 0.0306 0.0124 0.0769
## 6 fixed zi number_of_universities… -0.0220 0.0211 -0.0431 0.000212
ame_beta_zi_university_ps2014 <- university_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "number_of_universities_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_university_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 3 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 34.9 -2.33 1.79 0.95 median hdi
## 2 34.9 3.52 8.91 0.95 median hdi
## 3 34.9 47.9 59.8 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_university_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 31.3 28.63 -0.65 59.83 3.56 0.78
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_university_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Number of universities per 100000 people", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##number_of_psychology_researchers_per_capita
researcherdata <- read_csv("researcherdata.csv")
## Rows: 63 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country1, country_map, continent
## dbl (8): pop, number_of_psychology_researchers_per_capita, bts, total_bts, p...
## num (1): National Members
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
researcherdata
## # A tibble: 63 × 12
## country1 `National Members` country_map continent pop
## <chr> <dbl> <chr> <chr> <dbl>
## 1 Albania 950 AL Europe 2855.
## 2 Argentina 550 AR South America 45277.
## 3 Australia 24000 AU Oceania 25921.
## 4 Austria 5000 AT Europe 8922.
## 5 Bangladesh 1000 BD Asia 169356.
## 6 Belgium 300 BE Europe 11611.
## 7 Botswana 98 BW Africa 2588.
## 8 Bulgaria 700 BG Europe 6886.
## 9 Canada 6850 CA North America 38155.
## 10 Chile 3218 CL South America 19493.
## # ℹ 53 more rows
## # ℹ 7 more variables: number_of_psychology_researchers_per_capita <dbl>,
## # bts <dbl>, total_bts <dbl>, percentage_sample <dbl>, ps <dbl>,
## # total_ps <dbl>, ps2014 <dbl>
##BTS
researcher_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ number_of_psychology_researchers_per_capita,
phi ~ number_of_psychology_researchers_per_capita,
zi ~ number_of_psychology_researchers_per_capita),
data = researcherdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "researcher_model_beta_zi_bts"
)
tidy(researcher_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(researcher_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -1.94 2.39 -3.78 2.18
## 2 fixed cond phi_(Intercept) 1.25 3.23 -4.29 4.02
## 3 fixed zi (Intercept) 4.43 6.22 0.203 15.2
## 4 fixed cond number_of_psychology_r… -0.00263 0.00611 -0.0123 0.00710
## 5 fixed cond phi_number_of_psycholo… 0.0498 0.0776 -0.0107 0.184
## 6 fixed zi number_of_psychology_r… -0.152 0.219 -0.530 -0.00929
ame_beta_zi_researcher_bts <- researcher_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "number_of_psychology_researchers_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_researcher_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.52 0.541 4.06 0.95 median hdi
## 2 2.52 35.2 44.0 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_researcher_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 12.7 18.08 1.06 43.98 999 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_researcher_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of number of psychology researchers per capita", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
researcher_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ number_of_psychology_researchers_per_capita,
phi ~ number_of_psychology_researchers_per_capita,
zi ~ number_of_psychology_researchers_per_capita),
data = researcherdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "researcher_model_beta_zi_ps2014"
)
tidy(researcher_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(researcher_model_beta_zi_ps2014, effects = "fixed"):
## some parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -0.847 2.61 -3.35e+0 3.59
## 2 fixed cond phi_(Intercept) -3.85 6.67 -1.54e+1 1.09
## 3 fixed zi (Intercept) 3.50 6.85 -3.62e+0 15.2
## 4 fixed cond number_of_psychology_r… -0.0314 0.0168 -5.86e-2 -0.00867
## 5 fixed cond phi_number_of_psycholo… 0.164 0.223 8.80e-3 0.549
## 6 fixed zi number_of_psychology_r… -0.164 0.216 -5.30e-1 0.0191
ame_beta_zi_researcher_ps2014 <- researcher_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "number_of_psychology_researchers_per_capita") %>%
marginaleffects::posterior_draws()
ame_beta_zi_researcher_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 3 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.319 -22.9 16.9 0.95 median hdi
## 2 -0.319 19.8 20.6 0.95 median hdi
## 3 -0.319 30.8 35.7 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_researcher_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 5.98 18.06 -16.66 33.2 0.97 0.49
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_researcher_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of number of psychology researchers per capita", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##_1.5_average_years_of_schooling
edudata <- read_csv("Education_data.csv")
## Rows: 146 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Entity, country, country_map, continent
## dbl (8): Year, average_years_of_schooling, bts, total_bts, percentage_sample...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
edudata
## # A tibble: 146 × 12
## Entity Year average_years_of_sch…¹ country country_map continent bts
## <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 United Arab… 2020 9.57 ARE AE Asia 80
## 2 Afghanistan 2020 5.69 AFG AF Asia 0
## 3 Albania 2020 10.3 ALB AL Europe 0
## 4 Armenia 2020 10.5 ARM AM Asia 0
## 5 Argentina 2020 9.86 ARG AR South Am… 91
## 6 Austria 2020 10.4 AUT AT Europe 236
## 7 Australia 2020 12.9 AUS AU Oceania 885
## 8 Barbados 2020 9.67 BRB BB North Am… 0
## 9 Bangladesh 2020 7.23 BGD BD Asia 0
## 10 Belgium 2020 11.6 BEL BE Europe 86
## # ℹ 136 more rows
## # ℹ abbreviated name: ¹average_years_of_schooling
## # ℹ 5 more variables: total_bts <dbl>, percentage_sample <dbl>, ps <dbl>,
## # total_ps <dbl>, ps2014 <dbl>
edu_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ average_years_of_schooling,
phi ~ average_years_of_schooling,
zi ~ average_years_of_schooling),
data = edudata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "edu_model_beta_zi_bts"
)
tidy(edu_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(edu_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -4.36 0.870 -6.17 -2.79
## 2 fixed cond phi_(Intercept) 8.56 1.68 5.19 11.8
## 3 fixed zi (Intercept) 5.59 1.21 3.50 8.16
## 4 fixed cond average_years_of_schoo… 0.0558 0.0872 -0.107 0.229
## 5 fixed cond phi_average_years_of_s… -0.417 0.149 -0.710 -0.132
## 6 fixed zi average_years_of_schoo… -0.467 0.117 -0.709 -0.262
ame_beta_zi_edu_bts <- edu_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "average_years_of_schooling") %>%
marginaleffects::posterior_draws()
ame_beta_zi_edu_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 23.6 8.50 44.0 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_edu_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 24.82 9.5 11.69 41.89 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_edu_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Average years of schooling", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
edu_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ average_years_of_schooling,
phi ~ average_years_of_schooling,
zi ~ average_years_of_schooling),
data = edudata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "edu_model_beta_zi_ps2014"
)
tidy(edu_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(edu_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -7.00e- 1 2.56e+ 0 -5.69e+ 0 4.25e+ 0
## 2 fixed cond phi_(Intercept) -6.61e+ 0 2.83e+ 0 -1.20e+ 1 -9.63e- 1
## 3 fixed zi (Intercept) 1.31e+12 2.81e+12 5.28e+ 8 1.03e+13
## 4 fixed cond average_years_of_sch… -2.25e- 1 2.14e- 1 -6.42e- 1 1.94e- 1
## 5 fixed cond phi_average_years_of… 6.37e- 1 2.33e- 1 1.70e- 1 1.08e+ 0
## 6 fixed zi average_years_of_sch… -1.40e+11 3.00e+11 -1.10e+12 -5.65e+ 7
ame_beta_zi_edu_ps2014 <- edu_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "average_years_of_schooling") %>%
marginaleffects::posterior_draws()
ame_beta_zi_edu_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 23.6 -27.0 89.3 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_edu_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 27.49 30.5 -13.5 80.49 6.37 0.86
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_edu_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Average years of schooling", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.6_Urbanization
urbandata <- read_csv("Urbanization_data.csv")
## Rows: 212 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Country Code, country_map, continent
## dbl (7): Urbanization, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
urbandata
## # A tibble: 212 × 10
## `Country Code` Urbanization country_map continent bts total_bts
## <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 AND 87.8 AD Europe 0 11484
## 2 ARE 87.5 AE Asia 80 11484
## 3 AFG 26.6 AF Asia 0 11484
## 4 ATG 24.3 AG North America 0 11484
## 5 ALB 63.8 AL Europe 0 11484
## 6 ARM 63.6 AM Asia 0 11484
## 7 AGO 68.1 AO Africa 0 11484
## 8 ARG 92.3 AR South America 91 11484
## 9 ASM 87.2 AS Oceania 0 11484
## 10 AUT 59.3 AT Europe 236 11484
## # ℹ 202 more rows
## # ℹ 4 more variables: percentage_sample <dbl>, ps <dbl>, total_ps <dbl>,
## # ps2014 <dbl>
##BTS
urban_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ Urbanization,
phi ~ Urbanization,
zi ~ Urbanization),
data = urbandata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "urban_model_beta_zi_bts")
tidy(urban_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(urban_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.48 1.13 -5.47 -0.645
## 2 fixed cond phi_(Intercept) 30.3 46.7 -4.48 111.
## 3 fixed zi (Intercept) -15.5 33.4 -73.4 5.34
## 4 fixed cond Urbanization 0.0154 0.0290 -0.0350 0.0596
## 5 fixed cond phi_Urbanization -0.442 0.770 -1.77 0.0967
## 6 fixed zi Urbanization 0.276 0.540 -0.0570 1.21
ame_beta_zi_urban_bts <- urban_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "Urbanization") %>%
marginaleffects::posterior_draws()
ame_beta_zi_urban_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.37 -16.9 -14.2 0.95 median hdi
## 2 1.37 -1.62 4.32 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_urban_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 -2.79 7.68 -15.84 3.29 2.16 0.68
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_urban_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Urbanization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
urban_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ Urbanization,
phi ~ Urbanization,
zi ~ Urbanization),
data = urbandata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "urban_model_beta_zi_ps2014"
)
tidy(urban_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(urban_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -4.21 2.93 -7.62 1.27
## 2 fixed cond phi_(Intercept) 29.4 47.0 -3.30 111.
## 3 fixed zi (Intercept) -14.2 34.3 -73.5 8.82
## 4 fixed cond Urbanization 0.0252 0.0691 -0.0712 0.133
## 5 fixed cond phi_Urbanization -0.452 0.762 -1.77 0.0631
## 6 fixed zi Urbanization 0.235 0.565 -0.158 1.21
ame_beta_zi_urban_ps2014 <- urban_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "Urbanization") %>%
marginaleffects::posterior_draws()
ame_beta_zi_urban_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.96 -4.63 14.3 0.95 median hdi
## 2 3.96 14.6 15.0 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_urban_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 5.61 6 -2.07 12.96 4.63 0.82
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_urban_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Urbanization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.7_Globalization
gidata <- read_csv("gidata.csv")
## Rows: 191 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): country, country3, country_map, continent
## dbl (8): year, GI, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gidata
## # A tibble: 191 × 12
## country year GI country3 country_map continent bts total_bts
## <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 United Arab Emira… 2020 76 ARE AE Asia 80 11484
## 2 Afghanistan 2020 38 AFG AF Asia 0 11484
## 3 Antigua and Barbu… 2020 55 ATG AG North Am… 0 11484
## 4 Albania 2020 64 ALB AL Europe 0 11484
## 5 Armenia 2020 67 ARM AM Asia 0 11484
## 6 Angola 2020 43 AGO AO Africa 0 11484
## 7 Argentina 2020 69 ARG AR South Am… 91 11484
## 8 Austria 2020 88 AUT AT Europe 236 11484
## 9 Australia 2020 80 AUS AU Oceania 885 11484
## 10 Aruba 2020 45 ABW AW North Am… 0 11484
## # ℹ 181 more rows
## # ℹ 4 more variables: percentage_sample <dbl>, ps <dbl>, total_ps <dbl>,
## # ps2014 <dbl>
##BTS
gi_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ GI,
phi ~ GI,
zi ~ GI),
data = gidata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "gi_model_beta_zi_bts"
)
tidy(gi_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(gi_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -2.23 2.29 -6.15 2.60
## 2 fixed cond phi_(Intercept) 0.592 4.53 -8.58 8.53
## 3 fixed zi (Intercept) 9.42 1.46 6.67 12.3
## 4 fixed cond GI -0.0158 0.0281 -0.0745 0.0332
## 5 fixed cond phi_GI 0.0373 0.0559 -0.0623 0.151
## 6 fixed zi GI -0.121 0.0200 -0.161 -0.0837
ame_beta_zi_gi_bts <- gi_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "GI") %>%
marginaleffects::posterior_draws()
ame_beta_zi_gi_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 3.96 1.54 6.45 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_gi_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 4.06 1.23 2.38 6.17 284.71 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_gi_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Globalization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
gi_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ GI,
phi ~ GI,
zi ~ GI),
data = gidata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "gi_model_beta_zi_ps2014"
)
tidy(gi_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(gi_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -0.235 2.32 -4.89 4.12
## 2 fixed cond phi_(Intercept) -9.49 2.48 -14.2 -4.55
## 3 fixed zi (Intercept) 7.70 3.62 0.235 14.5
## 4 fixed cond GI -0.0464 0.0287 -0.100 0.0123
## 5 fixed cond phi_GI 0.147 0.0323 0.0806 0.208
## 6 fixed zi GI -0.130 0.0550 -0.262 -0.0372
ame_beta_zi_gi_ps2014 <- gi_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "GI") %>%
marginaleffects::posterior_draws()
ame_beta_zi_gi_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.86 -7.22 8.74 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_gi_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 1.94 4.34 -6.05 6.84 3.76 0.79
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_gi_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Globalization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.8_Internet penetration rate
networkdata <- read_csv("networkdata.csv")
## Rows: 179 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country_map, continent
## dbl (7): Network_2021, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
networkdata
## # A tibble: 179 × 9
## country_map Network_2021 continent bts total_bts percentage_sample ps
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AD 93.9 Europe 0 11484 0 33
## 2 AE 100 Asia 80 11484 0.00697 0
## 3 AG 95.7 North Ameri… 0 11484 0 0
## 4 AL 79.3 Europe 0 11484 0 33
## 5 AO 32.6 Africa 0 11484 0 0
## 6 AR 87.2 South Ameri… 91 11484 0.00793 0
## 7 AT 92.5 Europe 236 11484 0.0206 33
## 8 AU 96.2 Oceania 885 11484 0.0771 0
## 9 AZ 86 Asia 0 11484 0 0
## 10 BA 75.7 Europe 0 11484 0 33
## # ℹ 169 more rows
## # ℹ 2 more variables: total_ps <dbl>, ps2014 <dbl>
##BTS
network_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ Network_2021,
phi ~ Network_2021,
zi ~ Network_2021),
data = networkdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "network_model_beta_zi_bts"
)
tidy(network_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(network_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.81 1.07 -5.67 -1.31
## 2 fixed cond phi_(Intercept) 3.94 2.26 -1.49 7.20
## 3 fixed zi (Intercept) 5.06 1.10 2.99 7.37
## 4 fixed cond Network_2021 0.00288 0.0122 -0.0252 0.0251
## 5 fixed cond phi_Network_2021 -0.00407 0.0251 -0.0419 0.0566
## 6 fixed zi Network_2021 -0.0504 0.0132 -0.0778 -0.0259
ame_beta_zi_network_bts <- network_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "Network_2021") %>%
marginaleffects::posterior_draws()
ame_beta_zi_network_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2.63 0.623 5.12 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_network_bts, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 2.69 1.19 1.06 4.6 77.43 0.99 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_network_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Internet penetration rate", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
network_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ Network_2021,
phi ~ Network_2021,
zi ~ Network_2021),
data = networkdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "network_model_beta_zi_ps2014"
)
tidy(network_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(network_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) 5.09 3.02 -0.872 10.8
## 2 fixed cond phi_(Intercept) -7.55 3.46 -14.2 -0.743
## 3 fixed zi (Intercept) 18.2 8.52 7.64 40.6
## 4 fixed cond Network_2021 -0.104 0.0346 -0.170 -0.0346
## 5 fixed cond phi_Network_2021 0.105 0.0394 0.0266 0.180
## 6 fixed zi Network_2021 -0.246 0.126 -0.581 -0.0947
ame_beta_zi_network_ps2014 <- network_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "Network_2021") %>%
marginaleffects::posterior_draws()
ame_beta_zi_network_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -4.08 -13.5 1.88 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_network_ps2014, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 -4.75 4.01 -12.46 0.5 0.07 0.07
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_network_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Internet penetration rate", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
####1.9_cultural distance
pddata <- read_csv("pddata.csv")
## Rows: 80 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): country, country3, country_map, continent
## dbl (7): pd, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pddata
## # A tibble: 80 × 11
## country country3 country_map pd continent bts total_bts
## <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Algeria DZA DZ 0.155 Africa 0 11484
## 2 Andorra AND AD 0.108 Europe 0 11484
## 3 Argentina ARG AR 0.0722 South America 91 11484
## 4 Armenia ARM AM 0.175 Asia 0 11484
## 5 Australia AUS AU 0.0325 Oceania 885 11484
## 6 Azerbaijan AZE AZ 0.205 Asia 0 11484
## 7 Bahrain BHR BH 0.180 Asia 0 11484
## 8 Belarus BLY BY 0.0659 Europe 0 11484
## 9 Brazil BRA BR 0.0698 South America 201 11484
## 10 Bulgaria BGR BG 0.108 Europe 0 11484
## # ℹ 70 more rows
## # ℹ 4 more variables: percentage_sample <dbl>, ps <dbl>, total_ps <dbl>,
## # ps2014 <dbl>
##BTS
pd_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ pd,
phi ~ pd,
zi ~ pd),
data = pddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "pd_model_beta_zi_bts"
)
tidy(pd_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(pd_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.66 0.215 -4.05 -3.21
## 2 fixed cond phi_(Intercept) 2.73 0.506 1.65 3.63
## 3 fixed zi (Intercept) -2.79 0.853 -4.53 -1.23
## 4 fixed cond pd -0.545 0.943 -2.43 1.27
## 5 fixed cond phi_pd 12.4 5.47 1.31 22.9
## 6 fixed zi pd 28.3 7.34 14.8 43.4
ame_beta_zi_pd_bts <- pd_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "pd") %>% marginaleffects::posterior_draws()
ame_beta_zi_pd_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -356. -793. -93.3 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_pd_bts, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -394.87 195.22 -780.54 -161.38 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_pd_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Cultural distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
###PS2014
pd_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ pd,
phi ~ pd,
zi ~ pd),
data = pddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 1000, warmup = 500,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "pd_model_beta_zi_ps2014"
)
tidy(pd_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(pd_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -6.17 0.527 -7.07 -5.67
## 2 fixed cond phi_(Intercept) 73.2 86.5 -4.24 209.
## 3 fixed zi (Intercept) -36.9 41.1 -105. -2.61
## 4 fixed cond pd 2.45 6.70 -3.34 13.8
## 5 fixed cond phi_pd -299. 499. -1116. 139.
## 6 fixed zi pd 301. 325. 31.0 843.
ame_beta_zi_pd_ps2014 <- pd_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "pd") %>%
marginaleffects::posterior_draws()
ame_beta_zi_pd_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 3 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -42.4 -152. -134. 0.95 median hdi
## 2 -42.4 -117. -116. 0.95 median hdi
## 3 -42.4 -94.0 -0.0753 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_pd_ps2014, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -59.89 54.94 -142.93 -0.08 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_pd_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Cultural distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##1.10Linguistic distance ##lp1
lddata <- read_csv("lddata.csv")
## Rows: 161 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country_map, continent
## dbl (8): lp1, lp2, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lddata
## # A tibble: 161 × 10
## country_map continent lp1 lp2 bts total_bts percentage_sample ps
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD Europe 1.99 1.24 0 11484 0 33
## 2 AE Asia NA 0.741 80 11484 0.00697 0
## 3 AF Asia 1.95 0.855 0 11484 0 0
## 4 AG North Ameri… 0.292 0.175 0 11484 0 0
## 5 AI North Ameri… 0.292 0.175 0 11484 0 0
## 6 AL Europe 1.95 0.910 0 11484 0 33
## 7 AM Asia 1.95 0.648 0 11484 0 0
## 8 AO Africa 2.53 1.31 0 11484 0 0
## 9 AR South Ameri… 1.65 0.994 91 11484 0.00793 0
## 10 AT Europe 3.60 2.59 236 11484 0.0206 33
## # ℹ 151 more rows
## # ℹ 2 more variables: total_ps <dbl>, ps2014 <dbl>
##BTS
ld1_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ lp1,
phi ~ lp1,
zi ~ lp1),
data = lddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "ld1_model_beta_zi_bts"
)
tidy(ld1_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(ld1_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.01 0.438 -3.86 -2.13
## 2 fixed cond phi_(Intercept) 2.35 0.662 0.826 3.45
## 3 fixed zi (Intercept) 2.10 0.497 1.16 3.10
## 4 fixed cond lp1 -0.301 0.154 -0.611 -0.00523
## 5 fixed cond phi_lp1 0.786 0.254 0.309 1.31
## 6 fixed zi lp1 -0.694 0.248 -1.18 -0.218
ame_beta_zi_ld1_bts <- ld1_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "lp1") %>%
marginaleffects::posterior_draws()
ame_beta_zi_ld1_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 15.8 -24.4 45.2 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_ld1_bts, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 13.89 17.93 -18.88 38.62 0.21 0.17
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_ld1_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Linguistic distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
ld1_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ lp1,
phi ~ lp1,
zi ~ lp1),
data = lddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "ld1_model_beta_zi_ps2014"
)
tidy(ld1_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(ld1_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.58 0.639 -4.80 -2.25
## 2 fixed cond phi_(Intercept) 1.24 0.802 -0.633 2.50
## 3 fixed zi (Intercept) 2.09 0.751 0.542 3.54
## 4 fixed cond lp1 -0.578 0.204 -0.999 -0.198
## 5 fixed cond phi_lp1 1.48 0.248 1.04 2.03
## 6 fixed zi lp1 -0.984 0.340 -1.67 -0.309
ame_beta_zi_ld1_ps2014 <- ld1_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "lp1") %>%
marginaleffects::posterior_draws()
ame_beta_zi_ld1_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.29 -40.4 -39.9 0.95 median hdi
## 2 1.29 -34.4 18.6 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_ld1_ps2014, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -3.24 19.24 -33.55 12.85 0.82 0.45
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_ld1_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Linguistic distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##lp2
ld2_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ lp2,
phi ~ lp2,
zi ~ lp2),
data = lddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "ld2_model_beta_zi_bts"
)
tidy(ld2_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(ld2_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.29 0.276 -3.82 -2.74
## 2 fixed cond phi_(Intercept) 3.09 0.414 2.17 3.80
## 3 fixed zi (Intercept) 2.16 0.353 1.50 2.89
## 4 fixed cond lp2 -0.260 0.144 -0.558 0.00663
## 5 fixed cond phi_lp2 0.665 0.234 0.200 1.11
## 6 fixed zi lp2 -1.03 0.293 -1.62 -0.466
ame_beta_zi_ld2_bts <- ld2_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "lp2") %>%
marginaleffects::posterior_draws()
ame_beta_zi_ld2_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 34.8 0.341 65.6 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_ld2_bts, "draw < 0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 34.54 16.3 8.18 60.85 0.02 0.02
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_ld2_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Linguistic distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##ld2 ##BTS
ld2_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ lp2,
phi ~ lp2,
zi ~ lp2),
data = lddata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "ld2_model_beta_zi_ps2014"
)
tidy(ld2_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(ld2_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.75 0.535 -4.80 -2.67
## 2 fixed cond phi_(Intercept) 1.09 0.896 -1.02 2.51
## 3 fixed zi (Intercept) 2.54 0.839 0.662 3.86
## 4 fixed cond lp2 -0.693 0.201 -1.10 -0.307
## 5 fixed cond phi_lp2 2.14 0.378 1.49 3.01
## 6 fixed zi lp2 -1.87 0.528 -2.89 -0.829
ame_beta_zi_ld2_ps2014 <- ld2_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "lp2") %>%
marginaleffects::posterior_draws()
ame_beta_zi_ld2_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 20.6 -13.2 46.6 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_ld2_ps2014, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 18.15 21.07 -7.91 38.35 0.08 0.07
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_ld2_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of Linguistic distance", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##_1.11English Proficiency rank
englishdata <- read_csv("englishdata.csv")
## Rows: 116 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country1, country_map, continent
## dbl (7): EPI_rank, bts, total_bts, percentage_sample, ps, total_ps, ps2014
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
englishdata
## # A tibble: 116 × 10
## country1 EPI_rank country_map continent bts total_bts percentage_sample
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 United Arab… 84 AE Asia 80 11484 0.00697
## 2 Afghanistan 92 AF Asia 0 11484 0
## 3 Albania 52 AL Europe 0 11484 0
## 4 Armenia 62 AM Asia 0 11484 0
## 5 Angola 110 AO Africa 0 11484 0
## 6 Argentina 35 AR South Am… 91 11484 0.00793
## 7 Austria 8 AT Europe 236 11484 0.0206
## 8 Australia 3 AU Oceania 885 11484 0.0771
## 9 Azerbaijan 97 AZ Asia 0 11484 0
## 10 Bangladesh 71 BD Asia 0 11484 0
## # ℹ 106 more rows
## # ℹ 3 more variables: ps <dbl>, total_ps <dbl>, ps2014 <dbl>
##BTS
english_model_beta_zi_bts <- brms::brm(
bf(bts| trials(total_bts) ~ EPI_rank,
phi ~ EPI_rank,
zi ~ EPI_rank),
data = englishdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "english_model_beta_zi_bts"
)
tidy(english_model_beta_zi_bts, effects = "fixed")
## Warning in tidy.brmsfit(english_model_beta_zi_bts, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) 6.39 16.7 -3.74 35.4
## 2 fixed cond phi_(Intercept) 17.9 67.2 -79.7 106.
## 3 fixed zi (Intercept) 17.7 59.9 -69.3 90.6
## 4 fixed cond EPI_rank -0.123 0.268 -0.585 0.0701
## 5 fixed cond phi_EPI_rank -0.268 1.15 -1.79 1.38
## 6 fixed zi EPI_rank -0.301 1.04 -1.58 1.21
ame_beta_zi_english_bts <- english_model_beta_zi_bts %>%
marginaleffects::avg_comparisons(variables = "EPI_rank") %>%
marginaleffects::posterior_draws()
ame_beta_zi_english_bts %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -2.91 -3.90 -1.38 0.95 median hdi
## 2 -2.91 48.8 64.9 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_english_bts, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 14.1 29.35 -3.14 64.93 3 0.75
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_english_bts, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of English proficiency rank", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##PS2014
english_model_beta_zi_ps2014 <- brms::brm(
bf(ps| trials(total_ps) ~ EPI_rank,
phi ~ EPI_rank,
zi ~ EPI_rank),
data = englishdata,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
# regularising priors
prior = c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 1), class = b)),
# tune the mcmc sampler
control = list(adapt_delta = 0.99),
backend = "cmdstanr",
file = "english_model_beta_zi_ps2014"
)
tidy(english_model_beta_zi_ps2014, effects = "fixed")
## Warning in tidy.brmsfit(english_model_beta_zi_ps2014, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -5.33e+ 0 2.07e+ 0 -7.62e+ 0 -2.47e+ 0
## 2 fixed cond phi_(Intercept) 2.01e+ 1 6.30e+ 1 -6.86e+ 1 1.05e+ 2
## 3 fixed zi (Intercept) -1.76e+11 3.67e+11 -1.22e+12 8.53e+ 1
## 4 fixed cond EPI_rank 8.23e- 2 6.09e- 2 -1.56e- 2 1.42e- 1
## 5 fixed cond phi_EPI_rank -3.27e- 1 1.08e+ 0 -1.77e+ 0 1.19e+ 0
## 6 fixed zi EPI_rank 3.02e+ 9 6.27e+ 9 -1.49e+ 0 2.08e+10
ame_beta_zi_english_ps2014 <- english_model_beta_zi_ps2014 %>%
marginaleffects::avg_comparisons(variables = "EPI_rank") %>%
marginaleffects::posterior_draws()
ame_beta_zi_english_ps2014 %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 45.1 -11.2 109. 0.95 median hdi
## 2 45.1 111. 118. 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_english_ps2014, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 50.65 54.38 -7.25 117.86 1 0.5
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_english_ps2014, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of English proficiency rank", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##GDP_per_capita drawing
combined_data2 <- rbind(
transform(gdpdata, source = "PS2014", value = ps2014),
transform(gdpdata, source = "PSA001", value = percentage_sample)
)
# Create the plot
gdp1 <- ggplot(combined_data2, aes(x = GDP_per_capita, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("GDP per capita") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 150)) +
ylim(c(-0.2, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
gdp1 <- gdp1 +
annotate("text", x = 64.3, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 64.3, y = 0.235,
label = expression("US"), size = 6)
gdp1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "GDP.jpg", plot = gdp1, device = "jpg", width = 12, height = 12)
#R&D_investment drawing
combined_data3 <- rbind(
transform(rddata, source = "PS2014", value = ps2014),
transform(rddata, source = "PSA001", value = percentage_sample)
)
# Create the plot
RD1 <- ggplot(combined_data3, aes(x = R_D, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Research expenditure as a share of GDP") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 3.5)) +
ylim(c(-0.14, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
RD1 <- RD1 +
annotate("text", x = 2.55, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 2.55, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
RD1
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "RD.jpg", plot = RD1, device = "jpg", width = 12, height = 12)
##number_of_universities_per_capita drawing
combined_data1 <- rbind(
transform(universitydata, source = "PS2014", value = ps2014),
transform(universitydata, source = "PSA001", value = percentage_sample)
)
# Create the plot
university <- ggplot(combined_data1, aes(x = number_of_universities_per_capita, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science")) +
ylab("Proportion of sample") +
xlab("Number of universities per 100000 people") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 54)) +
ylim(c(-0.10, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
university <- university +
annotate("text", x = 5, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 5, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
university
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "university.jpg", plot = university, device = "jpg", width = 12, height = 12)
##number_of_psychology_researchers_per_capita drawing
combined_data1 <- rbind(
transform(researcherdata, source = "PS2014", value = ps2014),
transform(researcherdata, source = "PSA001", value = percentage_sample)
)
# Create the plot
researcher <- ggplot(combined_data1, aes(x = number_of_psychology_researchers_per_capita, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science")) +
ylab("Proportion of sample") +
xlab("Number of psychology researchers per 100000 people") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 200)) +
ylim(c(-0.14, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
researcher <- researcher +
annotate("text", x = 50, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 50, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
researcher
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
ggsave(filename = "researcher.jpg", plot = researcher, device = "jpg", width = 12, height = 12)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
##average_years_of_schooling drawing
combined_data9 <- rbind(
transform(edudata, source = "PS2014", value = ps2014),
transform(edudata, source = "PSA001", value = percentage_sample)
)
# Create the plot
education1 <- ggplot(combined_data9, aes(x = average_years_of_schooling, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Average years of formal education\nfor individuals aged 15-64") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 15)) +
ylim(c(-0.25, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the annotations
education1 <- education1 +
annotate("text", x = 12.1, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 12.1, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
education1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "education.jpg", plot = education1, device = "jpg", width = 12, height = 12)
##Urbanization drawing
combined_data8 <- rbind(
transform(urbandata, source = "PS2014", value = ps2014),
transform(urbandata, source = "PSA001", value = percentage_sample)
)
# Create the plot
UP1 <- ggplot(combined_data8, aes(x = Urbanization, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Proportion of urban population") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 100)) +
ylim(c(-0.25, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the annotations
UP1 <- UP1 +
annotate("text", x = 75, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 75, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
UP1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "UP.jpg", plot = UP1, device = "jpg", width = 12, height = 12)
##Globalization_Index drawing
combined_data4 <- rbind(
transform(gidata, source = "PS2014", value = ps2014),
transform(gidata, source = "PSA001", value = percentage_sample)
)
# Create the plot
GI1 <- ggplot(combined_data4, aes(x = GI, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Globalization Index") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 100)) +
ylim(c(-0.15, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the annotations
GI1 <- GI1 +
annotate("text", x = 73.8, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 73.8, y = 0.235,
label = expression("US"), size = 6)
# Print the plot
GI1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "GI.jpg", plot = GI1, device = "jpg", width = 12, height = 12)
##Internet penetration rate drawing
combined_data5 <- rbind(
transform(networkdata, source = "PS2014", value = ps2014),
transform(networkdata, source = "PSA001", value = percentage_sample)
)
# Create the plot
Network1 <- ggplot(combined_data5, aes(x = Network_2021, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Proportion of population using the internet") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 100)) +
ylim(c(-0.18, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the annotations
Network1 <- Network1 +
annotate("text", x = 84.3, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 84.3, y = 0.235,
label = expression("US"), size = 6)
Network1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
# ggsave(filename = "Network.jpg", plot = Network1, device = "jpg", width = 12, height = 12)
##cultural distance drawing
combined_data1 <- rbind(
transform(pddata, source = "PS2014", value = ps2014),
transform(pddata, source = "PSA001", value = percentage_sample)
)
# Create the plot
pd1 <- ggplot(combined_data1, aes(x = pd, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science")) +
ylab("Proportion of sample") +
xlab("Culture distance from United States") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 0.20)) +
ylim(c(-0.30, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
pd1 <- pd1 +
annotate("text", x = 0.018, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 0.018, y = 0.235,
label = expression("US"), size = 6)
#turn over X-axis
pd1 <- pd1 + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
# Print the plot
pd1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
# ggsave(filename = "PD.jpg", plot = pd1, device = "jpg", width = 12, height = 12)
##Linguistic_distance_1 drawing
combined_data6 <- rbind(
transform(lddata, source = "PS2014", value = ps2014),
transform(lddata, source = "PSA001", value = percentage_sample)
)
# Create the plot
ld1 <- ggplot(combined_data6, aes(x = lp1, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("Linguistic distance from United States\n(from English)") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right",
legend.text = element_text(size = 18),
legend.title = element_text(size = 20)) +
xlim(c(0, 3.5)) +
ylim(c(-0.25, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the label
ld1 <- ld1 +
annotate("text", x = 0.28, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 0.28, y = 0.235,
label = expression("US"), size = 6)
# turn over X-axis
ld1 <- ld1 + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
# Print the plot
ld1
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 110 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 110 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
#ggsave(filename = "LD.jpg", plot = ld1, device = "jpg", width = 12, height = 12)
##English Proficiency Index rank drawing
combined_data7 <- rbind(
transform(englishdata, source = "PS2014", value = ps2014),
transform(englishdata, source = "PSA001", value = percentage_sample)
)
# Create the plot
EPI1 <- ggplot(combined_data7, aes(x = EPI_rank, y = value, color = source)) +
geom_point(size = 6, alpha = 0.6) +
geom_smooth(method ="lm",size = 2) +
scale_color_brewer(palette = 'Set1') +
scale_color_manual(name = "Data source",
values = c("#377EB8","#E41A1C"),
labels = c("Traditional studies", "Big team science"))+
ylab("Proportion of sample") +
xlab("English Proficiency Index") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1,
legend.position = "right", # 设置图例位置为顶部
legend.text = element_text(size = 18), # 设置图例文本字体大小
legend.title = element_text(size = 20)) +
xlim(c(0, 120)) +
ylim(c(-0.25, 0.5)) +
guides(color = guide_legend(override.aes = list(linetype = c("solid", "dashed"))))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Adding the annotations
EPI1 <- EPI1 +
annotate("text", x = 11.5, y = 0.455,
label = expression("US"), size = 6) +
annotate("text", x = 11.5, y = 0.235,
label = expression("US"), size = 6)
# turn over X-axis
EPI1 <- EPI1 + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
# Print the plot
EPI1
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save the plot as a JPEG image
# ggsave(filename = "EPI.jpg", plot = EPI1, device = "jpg", width = 12, height = 12)
##Merge the figures
# Arrange 11 graphics horizontally, left and right
combined_plot1 <- gdp1+ RD1+university+ researcher+education1+UP1+GI1+ Network1 + pd1+ld1+EPI1 +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(tag_levels ="A",
theme = theme(plot.title = element_text(size = 26)))
# Save the plot as a PDF image
ggsave(filename = "Exploratory_analyses.pdf", plot = combined_plot1, device = "pdf", width = 24, height = 20)
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 110 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 110 rows containing missing values (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
######_2.Relation between the proportion of each state’s/province’s sample in big team science (using Ruggeri et al. (2022) as an example) and Urbanization/Internet penetration rate/English Proficiency in Kenya/Nigeria, China, and United States/Netherlands.
##2.1_Urbanization ##2.1.1_KE_urbanization
KE_urbanization <- read_csv("KE_urbanization.csv")
## Rows: 8 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Province
## dbl (5): population, KE_UP_2020, Ruggeri_KE, total_KE, Ruggeri_KE_pro
## num (1): urban populaition
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
KE_urbanization
## # A tibble: 8 × 7
## Province `urban populaition` population KE_UP_2020 Ruggeri_KE total_KE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Central 2177847 5481849. 0.397 5 120
## 2 Coast 1939583 4327983. 0.448 7 120
## 3 Eastern 1025466 6809051. 0.151 2 120
## 4 Nairobi 4395749 4395749 1 90 120
## 5 North Eastern 658442 2487235. 0.265 1 120
## 6 Nyanza 1008284 6259751. 0.161 7 120
## 7 Rift Valley 3082533 12751056. 0.242 5 120
## 8 Western 547521 5024584. 0.109 3 120
## # ℹ 1 more variable: Ruggeri_KE_pro <dbl>
KE_urbanization_model_beta_zi <- brms::brm(
bf(Ruggeri_KE| trials(total_KE) ~ KE_UP_2020,
phi ~ KE_UP_2020,
zi ~ KE_UP_2020),
data = KE_urbanization,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "KE_urbanization_model_beta_zi "
)
tidy(KE_urbanization_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(KE_urbanization_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -4.10 0.496 -4.76 -3.60
## 2 fixed cond phi_(Intercept) 7.83 14.9 -2.52 33.5
## 3 fixed zi (Intercept) -0.183 1.13 -1.69 1.17
## 4 fixed cond KE_UP_2020 3.19 2.39 -0.0234 6.14
## 5 fixed cond phi_KE_UP_2020 55.5 22.8 40.5 95.7
## 6 fixed zi KE_UP_2020 -9.42 7.54 -18.6 1.89
ame_beta_zi_KE_urbanization <- KE_urbanization_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "KE_UP_2020") %>%
marginaleffects::posterior_draws()
ame_beta_zi_KE_urbanization %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 21.5 2.36 2.79 0.95 median hdi
## 2 21.5 43.5 45.0 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_KE_urbanization, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 22.66 15.87 2.43 45.02 Inf 1 *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_KE_urbanization, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of KE_urbanization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.1.2_CN_urbanization
CN_urbanization <- read_csv("CN_urbanization.csv")
## Rows: 31 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): State
## dbl (4): CN_UP_2020, Ruggeri_CN, total_CN, Ruggeri_CN_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CN_urbanization
## # A tibble: 31 × 5
## State CN_UP_2020 Ruggeri_CN total_CN Ruggeri_CN_pro
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Guizhou 0.532 1 387 0.00258
## 2 Hainan 0.603 1 387 0.00258
## 3 Jiangxi 0.604 1 387 0.00258
## 4 Shaanxi 0.627 1 387 0.00258
## 5 Tianjin 0.847 1 387 0.00258
## 6 Anhui 0.583 2 387 0.00517
## 7 Chongqing 0.695 2 387 0.00517
## 8 Hebei 0.601 2 387 0.00517
## 9 Yunnan 0.500 2 387 0.00517
## 10 Fujian 0.688 3 387 0.00775
## # ℹ 21 more rows
CN_urbanization_model_beta_zi <- brms::brm(
bf(Ruggeri_CN| trials(total_CN) ~ CN_UP_2020,
phi ~ CN_UP_2020,
zi ~ CN_UP_2020),
data = CN_urbanization,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "CN_urbanization_model_beta_zi "
)
tidy(CN_urbanization_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(CN_urbanization_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -2.31 1.41 -3.95 0.0364
## 2 fixed cond phi_(Intercept) 0.689 1.43 -1.22 3.38
## 3 fixed zi (Intercept) -5954. 7862. -25820. 0.666
## 4 fixed cond CN_UP_2020 0.120 0.354 -0.865 0.578
## 5 fixed cond phi_CN_UP_2020 0.568 1.14 -1.91 3.40
## 6 fixed zi CN_UP_2020 2133. 2815. -0.140 9248.
ame_beta_zi_CN_urbanization <- CN_urbanization_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "CN_UP_2020") %>%
marginaleffects::posterior_draws()
ame_beta_zi_CN_urbanization %>% tidybayes::median_hdi(draw)
## # A tibble: 13 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 6.93 -17.9 -17.9 0.95 median hdi
## 2 6.93 -15.9 -14.9 0.95 median hdi
## 3 6.93 -14.7 -14.6 0.95 median hdi
## 4 6.93 -14.4 -13.5 0.95 median hdi
## 5 6.93 -13.2 -13.2 0.95 median hdi
## 6 6.93 -12.9 -12.6 0.95 median hdi
## 7 6.93 -12.0 -11.3 0.95 median hdi
## 8 6.93 -11.1 12.5 0.95 median hdi
## 9 6.93 12.9 13.3 0.95 median hdi
## 10 6.93 14.1 14.8 0.95 median hdi
## 11 6.93 15.7 16.3 0.95 median hdi
## 12 6.93 17.6 17.9 0.95 median hdi
## 13 6.93 20.7 20.9 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_CN_urbanization, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 4.17 6.32 -8.37 8.99 4.96 0.83
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_CN_urbanization, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of CN_urbanization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.1.3_US_urbanization
US_urbanization <- read_csv("US_urbanization.csv")
## Rows: 51 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (4): US_UP_2020, Ruggeri_US, total_US, Ruggeri_US_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
US_urbanization
## # A tibble: 51 × 5
## state US_UP_2020 Ruggeri_US total_US Ruggeri_US_pro
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama 0.577 5 369 0.0136
## 2 Alaska 0.649 1 369 0.00271
## 3 Arizona 0.893 4 369 0.0108
## 4 Arkansas 0.555 0 369 0
## 5 California 0.942 14 369 0.0379
## 6 Colorado 0.86 5 369 0.0136
## 7 Connecticut 0.863 2 369 0.00542
## 8 Delaware 0.826 0 369 0
## 9 District of Columbia 1 61 369 0.165
## 10 Florida 0.915 41 369 0.111
## # ℹ 41 more rows
US_urbanization_model_beta_zi <- brms::brm(
bf(Ruggeri_US| trials(total_US) ~ US_UP_2020,
phi ~ US_UP_2020,
zi ~ US_UP_2020),
data = US_urbanization,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 4, seed = 1234,
backend = "cmdstanr",
file = "US_urbanization_model_beta_zi "
)
tidy(US_urbanization_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(US_urbanization_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -6.13 1.20 -8.51 -3.84
## 2 fixed cond phi_(Intercept) 3.59 1.06 1.35 5.50
## 3 fixed zi (Intercept) -6.41 5.97 -20.5 3.31
## 4 fixed cond US_UP_2020 3.07 1.53 0.132 6.02
## 5 fixed cond phi_US_UP_2020 -0.467 1.36 -3.03 2.31
## 6 fixed zi US_UP_2020 4.14 7.49 -10.0 19.7
ame_beta_zi_US_urbanization <- US_urbanization_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "US_UP_2020") %>%
marginaleffects::posterior_draws()
ame_beta_zi_US_urbanization %>% tidybayes::median_hdi(draw)
## # A tibble: 2 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 13.9 -6.65 63.9 0.95 median hdi
## 2 13.9 68.8 71.3 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_US_urbanization, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 19.85 20.83 -1.06 63.29 11.54 0.92
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_US_urbanization, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of US_urbanization", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.2_Internet penetration rate ##2.2.1_NG_internet
NG_internet <- read_csv("NG_internet.csv")
## Rows: 37 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): State
## dbl (7): Orders, Active Internet 2022, Internet_NG_2022, percentage_Internet...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NG_internet
## # A tibble: 37 × 8
## Orders State `Active Internet 2022` Internet_NG_2022 percentage_Internet_…¹
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 20 Akwa I… 2702281 0.565 20.9
## 2 36 Bayelsa 1096051. 0.458 16.9
## 3 12 Benue 3709942. 0.641 23.7
## 4 26 Cross … 2021284 0.484 17.9
## 5 16 Delta 5453929. 1.03 37.9
## 6 22 Edo 5366544. 1.20 44.4
## 7 31 Ekiti 1407720. 0.420 15.5
## 8 24 Enugu 3032204. 0.690 25.5
## 9 37 Federa… 7320230 2.71 100
## 10 1 Kano 8634113 0.606 22.4
## # ℹ 27 more rows
## # ℹ abbreviated name: ¹percentage_Internet_NG_2022
## # ℹ 3 more variables: Ruggeri_NG <dbl>, total_NG <dbl>, Ruggeri_NG_pro <dbl>
NG_internet_model_beta_zi <- brms::brm(
bf(Ruggeri_NG| trials(total_NG) ~ Internet_NG_2022,
phi ~ Internet_NG_2022,
zi ~ Internet_NG_2022),
data = NG_internet,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "NG_internet_model_beta_zi "
)
tidy(NG_internet_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(NG_internet_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -1.65 1.23 -4.52 0.464
## 2 fixed cond phi_(Intercept) -1.19 1.79 -4.25 2.83
## 3 fixed zi (Intercept) -2.07 3.08 -8.97 3.37
## 4 fixed cond Internet_NG_2022 -1.97 1.50 -4.33 1.78
## 5 fixed cond phi_Internet_NG_2022 3.58 2.44 -2.06 7.57
## 6 fixed zi Internet_NG_2022 -0.932 3.81 -9.79 5.62
ame_beta_zi_NG_internet <- NG_internet_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "Internet_NG_2022") %>%
marginaleffects::posterior_draws()
ame_beta_zi_NG_internet %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -17.8 -79.7 23.5 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_NG_internet, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -22.9 25.68 -73.02 10.01 6.6 0.87
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_NG_internet, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of NG_internet", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.2.2_CN_internet
CN_internet <- read_csv("CN_internet.csv")
## Rows: 31 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): State
## dbl (4): Internet_CN_2016, Ruggeri_CN, total_CN, Ruggeri_CN_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CN_internet
## # A tibble: 31 × 5
## State Internet_CN_2016 Ruggeri_CN total_CN Ruggeri_CN_pro
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Guizhou 0.432 1 387 0.00258
## 2 Hainan 0.516 1 387 0.00258
## 3 Jiangxi 0.446 1 387 0.00258
## 4 Shaanxi 0.524 1 387 0.00258
## 5 Tianjin 0.646 1 387 0.00258
## 6 Anhui 0.443 2 387 0.00517
## 7 Chongqing 0.516 2 387 0.00517
## 8 Hebei 0.533 2 387 0.00517
## 9 Yunnan 0.399 2 387 0.00517
## 10 Fujian 0.697 3 387 0.00775
## # ℹ 21 more rows
CN_internet_model_beta_zi <- brms::brm(
bf(Ruggeri_CN| trials(total_CN) ~ Internet_CN_2016,
phi ~ Internet_CN_2016,
zi ~ Internet_CN_2016),
data = CN_internet,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "CN_internet_model_beta_zi "
)
tidy(CN_internet_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(CN_internet_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -4.60 1.80 -8.18 -0.957
## 2 fixed cond phi_(Intercept) 1.96 2.16 -2.34 6.11
## 3 fixed zi (Intercept) -0.917 6.02 -13.1 11.4
## 4 fixed cond Internet_CN_2016 2.64 3.12 -3.45 9.03
## 5 fixed cond phi_Internet_CN_2016 -0.191 3.80 -8.11 6.85
## 6 fixed zi Internet_CN_2016 -4.30 11.3 -31.1 15.0
ame_beta_zi_CN_internet <- CN_internet_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "Internet_CN_2016") %>%
marginaleffects::posterior_draws()
ame_beta_zi_CN_internet %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 34.2 -58.3 271. 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_CN_internet, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 56.92 80.69 -29.85 224.44 5.08 0.84
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_CN_internet, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of CN_internet", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.2.3_US_internet
US_internet <- read_csv("US_internet.csv")
## Rows: 51 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (4): Internet_US_2018, Ruggeri_US, total_US, Ruggeri_US_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
US_internet
## # A tibble: 51 × 5
## state Internet_US_2018 Ruggeri_US total_US Ruggeri_US_pro
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Arkansas 0.769 0 369 0
## 2 Delaware 0.884 0 369 0
## 3 Georgia 0.837 0 369 0
## 4 Hawaii 0.857 0 369 0
## 5 Louisiana 0.781 0 369 0
## 6 Montana 0.836 0 369 0
## 7 Nevada 0.859 0 369 0
## 8 North Dakota 0.803 0 369 0
## 9 Oregon 0.879 0 369 0
## 10 South Dakota 0.821 0 369 0
## # ℹ 41 more rows
US_internet_model_beta_zi <- brms::brm(
bf(Ruggeri_US| trials(total_US) ~ Internet_US_2018,
phi ~ Internet_US_2018,
zi ~ Internet_US_2018),
data = US_internet,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "US_internet_model_beta_zi "
)
tidy(US_internet_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(US_internet_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -18.8 8.61 -34.4 -1.80
## 2 fixed cond phi_(Intercept) 18.6 15.2 -11.5 46.5
## 3 fixed zi (Intercept) -1.41 28.3 -61.0 49.2
## 4 fixed cond Internet_US_2018 17.7 10.1 -2.36 36.3
## 5 fixed cond phi_Internet_US_2018 -18.1 17.8 -51.1 17.0
## 6 fixed zi Internet_US_2018 -2.60 33.3 -64.1 66.2
ame_beta_zi_US_internet <- US_internet_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "Internet_US_2018") %>%
marginaleffects::posterior_draws()
ame_beta_zi_US_internet %>% tidybayes::median_hdi(draw)
## # A tibble: 5 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 214. -16.6 131. 0.95 median hdi
## 2 214. 143. 153. 0.95 median hdi
## 3 214. 181. 189. 0.95 median hdi
## 4 214. 209. 223. 0.95 median hdi
## 5 214. 239. 369. 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_US_internet, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 185.3 168.89 -0.2 368.98 5.59 0.85
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_US_internet, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of US_internet", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.3_English Proficiency ##2.3.1_NG_EPI
NG_EPI <- read_csv("NG_EPI.csv")
## Rows: 37 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): State, region
## dbl (6): Orders, score, EPI_NG_2022, Ruggeri_NG, total_NG, Ruggeri_NG_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NG_EPI
## # A tibble: 37 × 8
## Orders State region score EPI_NG_2022 Ruggeri_NG total_NG Ruggeri_NG_pro
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 31 Ekiti South… 568 1 0 257 0
## 2 19 Ondo South… 568 1 0 257 0
## 3 12 Benue North… 564 7 0 257 0
## 4 27 Kogi North… 564 7 0 257 0
## 5 33 Kwara North… 564 7 0 257 0
## 6 20 Akwa Ibom Niger… 560 14 0 257 0
## 7 36 Bayelsa Niger… 560 14 0 257 0
## 8 26 Cross Riv… Niger… 560 14 0 257 0
## 9 16 Delta Niger… 560 14 0 257 0
## 10 22 Edo Niger… 560 14 0 257 0
## # ℹ 27 more rows
NG_EPI_model_beta_zi <- brms::brm(
bf(Ruggeri_NG| trials(total_NG) ~ EPI_NG_2022,
phi ~ EPI_NG_2022,
zi ~ EPI_NG_2022),
data = NG_EPI,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "NG_EPI_model_beta_zi "
)
tidy(NG_EPI_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(NG_EPI_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -0.281 4.15 -3.93 6.82
## 2 fixed cond phi_(Intercept) -13.5 24.2 -55.4 1.75
## 3 fixed zi (Intercept) -0.402 4.18 -7.10 6.18
## 4 fixed cond EPI_NG_2022 -0.150 0.200 -0.493 0.0255
## 5 fixed cond phi_EPI_NG_2022 0.864 1.39 -0.00940 3.26
## 6 fixed zi EPI_NG_2022 -0.0991 0.180 -0.378 0.167
ame_beta_zi_NG_EPI <- NG_EPI_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "EPI_NG_2022") %>%
marginaleffects::posterior_draws()
ame_beta_zi_NG_EPI %>% tidybayes::median_hdi(draw)
## # A tibble: 3 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.143 -1.48 -1.44 0.95 median hdi
## 2 -0.143 -1.41 -1.40 0.95 median hdi
## 3 -0.143 -1.31 0.362 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_NG_EPI, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -0.22 0.45 -1.06 0.19 1.71 0.63
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_NG_EPI, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of NG_EPI", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.3.2_CN_EPI
CN_EPI <- read_csv("CN_EPI.csv")
## Rows: 31 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): State
## dbl (5): score, EPI_CN_2022, Ruggeri_CN, total_CN, Ruggeri_CN_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CN_EPI
## # A tibble: 31 × 6
## State score EPI_CN_2022 Ruggeri_CN total_CN Ruggeri_CN_pro
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Beijing 549 1 9 387 0.0233
## 2 Shanghai 549 1 65 387 0.168
## 3 Tianjin 528 3 1 387 0.00258
## 4 Zhejiang 509 4 85 387 0.220
## 5 Shandong 496 5 4 387 0.0103
## 6 Guangdong 492 6 15 387 0.0388
## 7 Hubei 489 7 5 387 0.0129
## 8 Jiangsu 488 8 15 387 0.0388
## 9 Shaanxi 485 9 1 387 0.00258
## 10 Fujian 483 10 3 387 0.00775
## # ℹ 21 more rows
CN_EPI_model_beta_zi <- brms::brm(
bf(Ruggeri_CN| trials(total_CN) ~ EPI_CN_2022,
phi ~ EPI_CN_2022,
zi ~ EPI_CN_2022),
data = CN_EPI,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "CN_EPI_model_beta_zi"
)
tidy(CN_EPI_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(CN_EPI_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -2.73 0.520 -3.75 -1.69
## 2 fixed cond phi_(Intercept) 2.42 0.616 1.11 3.53
## 3 fixed zi (Intercept) -4.56 2.68 -11.1 -0.786
## 4 fixed cond EPI_CN_2022 -0.0295 0.0368 -0.100 0.0453
## 5 fixed cond phi_EPI_CN_2022 -0.0376 0.0372 -0.114 0.0328
## 6 fixed zi EPI_CN_2022 0.0777 0.139 -0.209 0.362
ame_beta_zi_CN_EPI <- CN_EPI_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "EPI_CN_2022") %>%
marginaleffects::posterior_draws()
ame_beta_zi_CN_EPI %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.478 -1.73 0.567 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_CN_EPI, "draw<0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) < 0 -0.49 0.56 -1.41 0.37 6.38 0.86
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_CN_EPI, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of CN_EPI", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##2.3.3_NL_EPI
NL_EPI <- read_csv("NL_EPI.csv")
## Rows: 12 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (5): score, EPI_NL_2022, Ruggeri_NL, total_NL, Ruggeri_NL_pro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NL_EPI
## # A tibble: 12 × 6
## state score EPI_NL_2022 Ruggeri_NL total_NL Ruggeri_NL_pro
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Friesland 662 7 0 120 0
## 2 Limburg 658 9 0 120 0
## 3 Flevoland 645 11 0 120 0
## 4 Zeeland 678 1 1 120 0.00833
## 5 Overijssel 663 6 2 120 0.0167
## 6 Drenthe 629 12 2 120 0.0167
## 7 North Holland 671 4 7 120 0.0583
## 8 Utrecht 676 2 8 120 0.0667
## 9 Gelderland 665 5 8 120 0.0667
## 10 South Holland 671 3 12 120 0.1
## 11 Groningen 648 10 14 120 0.117
## 12 North Brabant 660 8 66 120 0.55
NL_EPI_model_beta_zi <- brms::brm(
bf(Ruggeri_NL| trials(total_NL) ~ EPI_NL_2022,
phi ~ EPI_NL_2022,
zi ~ EPI_NL_2022),
data = NL_EPI,
family = zero_inflated_beta_binomial(),
chains = 4, iter = 2000, warmup = 1000,
cores = 2, seed = 1234,
backend = "cmdstanr",
file = "NL_EPI_model_beta_zi "
)
tidy(NL_EPI_model_beta_zi, effects = "fixed")
## Warning in tidy.brmsfit(NL_EPI_model_beta_zi, effects = "fixed"): some
## parameter names contain underscores: term naming may be unreliable!
## # A tibble: 6 × 7
## effect component term estimate std.error conf.low conf.high
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fixed cond (Intercept) -3.28 0.812 -4.62 -1.25
## 2 fixed cond phi_(Intercept) 4.66 1.89 0.705 8.40
## 3 fixed zi (Intercept) -3.84 2.75 -10.2 0.492
## 4 fixed cond EPI_NL_2022 0.193 0.143 -0.123 0.450
## 5 fixed cond phi_EPI_NL_2022 -0.464 0.248 -0.955 0.0391
## 6 fixed zi EPI_NL_2022 0.225 0.385 -0.575 0.960
ame_beta_zi_NL_EPI <- NL_EPI_model_beta_zi %>%
marginaleffects::avg_comparisons(variables = "EPI_NL_2022") %>%
marginaleffects::posterior_draws()
ame_beta_zi_NL_EPI %>% tidybayes::median_hdi(draw)
## # A tibble: 1 × 6
## draw .lower .upper .width .point .interval
## <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.02 -1.38 5.63 0.95 median hdi
result <- brms::hypothesis(ame_beta_zi_NL_EPI, "draw>0")
result
## Hypothesis Tests for class :
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 (draw) > 0 1.35 1.78 -0.81 4.74 4.12 0.8
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
ggplot(ame_beta_zi_NL_EPI, aes(x = draw)) +
stat_halfeye(.width = c(0.8, 0.95), point_interval = "median_hdi",
fill = "#bc3032") +
scale_x_continuous(labels = label_pp) +
labs(x = "Average marginal effect of NL_EPI", y = NULL,
caption = "80% and 95% credible intervals shown in black") +
theme_clean()
##Kenya drawing (Urbanization)
model <- lm(Ruggeri_KE_pro ~ KE_UP_2020, data = KE_urbanization)
predict_data_KE <- data.frame(KE_UP_2020 = KE_urbanization$KE_UP_2020, Ruggeri_KE_pro = KE_urbanization$Ruggeri_KE_pro )
predict_data_KE$predicted <- predict(model, newdata = predict_data_KE)
predict_data_KE$lower <- predict(model, newdata = KE_urbanization, interval = "confidence")[, "lwr"]
predict_data_KE$upper <- predict(model, newdata = KE_urbanization, interval = "confidence")[, "upr"]
KE_UP <- ggplot(data = KE_urbanization, aes(y = Ruggeri_KE_pro, x = KE_UP_2020, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_KE, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_KE, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of urban population\nby province in Kenya") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 1)) +
ylim(c(-0.23, 0.9)) +
guides(color = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
KE_UP
# Save the plot as a pdf image
#ggsave(filename = " KE_UP.pdf", plot = KE_UP, device = "jpg", width = 12, height = 12)
##China drawing (Urbanization)
model <- lm(Ruggeri_CN_pro ~ CN_UP_2020, data = CN_urbanization)
predict_data_CN <- data.frame(CN_UP_2020= CN_urbanization$CN_UP_2020, Ruggeri_CN_pro = CN_urbanization$Ruggeri_CN_pro )
predict_data_CN$predicted <- predict(model, newdata = predict_data_CN)
predict_data_CN$lower <- predict(model, newdata = CN_urbanization, interval = "confidence")[, "lwr"]
predict_data_CN$upper <- predict(model, newdata = CN_urbanization, interval = "confidence")[, "upr"]
CN_UP <- ggplot(data = CN_urbanization, aes(y = Ruggeri_CN_pro, x = CN_UP_2020, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_CN, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_CN, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of urban population\nby province in China") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 1)) +
ylim(c(-0.03, 0.2)) +
guides(color = FALSE)
CN_UP
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
# Save the plot as a pdf image
# ggsave(filename = "CN_UP.jpg", plot = CN_UP, device = "pdf", width = 12, height = 12)
##the United States drawing (Urbanization)
model <- lm(Ruggeri_US_pro ~ US_UP_2020, data = US_urbanization)
predict_data_US <- data.frame(US_UP_2020= US_urbanization$US_UP_2020, Ruggeri_US_pro = US_urbanization$Ruggeri_US_pro )
predict_data_US$predicted <- predict(model, newdata = predict_data_US)
predict_data_US$lower <- predict(model, newdata = US_urbanization, interval = "confidence")[, "lwr"]
predict_data_US$upper <- predict(model, newdata = US_urbanization, interval = "confidence")[, "upr"]
US_UP <- ggplot(data = US_urbanization, aes(y = Ruggeri_US_pro, x = US_UP_2020, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_US, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_US, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of urban population\nby state in United States") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 1)) +
ylim(c(-0.036, 0.2)) +
guides(color = FALSE)
US_UP
# Save the plot as a pdf image
# ggsave(filename = " US_UP.jpg", plot = US_UP, device = "pdf", width = 12, height = 12)
##Nigeria drawing (Internet penetration rate)
# 拟合线性回归模型并计算置信区间
model <- lm(Ruggeri_NG_pro ~ Internet_NG_2022, data = NG_internet)
predict_data_NG <- data.frame(Internet_NG_2022 = NG_internet$Internet_NG_2022, Ruggeri_NG_pro = NG_internet$Ruggeri_NG_pro )
predict_data_NG$predicted <- predict(model, newdata = predict_data_NG)
predict_data_NG$lower <- predict(model, newdata = NG_internet, interval = "confidence")[, "lwr"]
predict_data_NG$upper <- predict(model, newdata = NG_internet, interval = "confidence")[, "upr"]
# 绘制散点图和回归线
NG_IPR <- ggplot(data = NG_internet, aes(y = Ruggeri_NG_pro, x = Internet_NG_2022, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_NG, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_NG, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of active Internet\nby state in Nigeria") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 3)) +
ylim(c(-0.18, 0.7)) +
guides(color = FALSE)
NG_IPR
# Save the plot as a JPEG image
ggsave(filename = "NG_IPR.jpg", plot = NG_IPR, device = "jpg", width = 12, height = 12)
##China drawing (Internet penetration rate)
# 拟合线性回归模型并计算置信区间
model <- lm(Ruggeri_CN_pro ~ Internet_CN_2016, data = CN_internet)
predict_data_CN <- data.frame(Internet_CN_2016= CN_internet$Internet_CN_2016, Ruggeri_CN_pro = CN_internet$Ruggeri_CN_pro )
predict_data_CN$predicted <- predict(model, newdata = predict_data_CN)
predict_data_CN$lower <- predict(model, newdata = CN_internet, interval = "confidence")[, "lwr"]
predict_data_CN$upper <- predict(model, newdata = CN_internet, interval = "confidence")[, "upr"]
# 绘制散点图和回归线
CN_IPR <- ggplot(data = CN_internet, aes(y = Ruggeri_CN_pro, x = Internet_CN_2016, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_CN, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_CN, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of population using the internet\nby province in china") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0.38, 1)) +
ylim(c(-0.03, 0.3)) +
guides(color = FALSE)
CN_IPR
# Save the plot as a JPEG image
#ggsave(filename = "CN_IPR.jpg", plot = CN_IPR, device = "jpg", width = 12, height = 12)
##the United States drawing (Internet penetration rate)
model <- lm(Ruggeri_US_pro ~ Internet_US_2018, data = US_internet)
predict_data_US <- data.frame(Internet_US_2018= US_internet$Internet_US_2018, Ruggeri_US_pro = US_internet$Ruggeri_US_pro )
predict_data_US$predicted <- predict(model, newdata = predict_data_US)
predict_data_US$lower <- predict(model, newdata = US_internet, interval = "confidence")[, "lwr"]
predict_data_US$upper <- predict(model, newdata = US_internet, interval = "confidence")[, "upr"]
US_IPR <- ggplot(data = US_internet, aes(y = Ruggeri_US_pro, x = Internet_US_2018, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_US, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_US, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("Proportion of population using the internet\nby state in United States") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0.38, 1)) +
ylim(c(-0.03, 0.2)) +
guides(color = FALSE)
US_IPR
# Save the plot as a JPEG image
#ggsave(filename = "US_IPR.jpg", plot = US_IPR, device = "jpg", width = 12, height = 12)
##Nigeria drawing (English Proficiency)
model <- lm(Ruggeri_NG_pro ~ EPI_NG_2022, data = NG_EPI)
predict_data_NG <- data.frame(EPI_NG_2022 = NG_EPI$EPI_NG_2022, Ruggeri_NG_pro = NG_EPI$Ruggeri_NG_pro )
predict_data_NG$predicted <- predict(model, newdata = predict_data_NG)
predict_data_NG$lower <- predict(model, newdata = NG_EPI, interval = "confidence")[, "lwr"]
predict_data_NG$upper <- predict(model, newdata = NG_EPI, interval = "confidence")[, "upr"]
NG_EPI <- ggplot(data = NG_EPI, aes(y = Ruggeri_NG_pro, x = EPI_NG_2022, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_NG, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_NG, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("English Proficiency of Nigeria by state") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(470, 670)) +
ylim(c(-0.13, 0.67)) +
guides(color = FALSE)
#turn over X-axis
NG_EPI <- NG_EPI + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
NG_EPI
# Save the plot as a JPEG image
# ggsave(filename = " NG_EPI.jpg", plot = NG_EPI, device = "jpg", width = 12, height = 12)
##China drawing (English Proficiency)
model <- lm(Ruggeri_CN_pro ~ EPI_CN_2022, data = CN_EPI)
predict_data_CN <- data.frame(EPI_CN_2022 = CN_EPI$EPI_CN_2022, Ruggeri_CN_pro = CN_EPI$Ruggeri_CN_pro )
predict_data_CN$predicted <- predict(model, newdata = predict_data_CN)
predict_data_CN$lower <- predict(model, newdata = CN_EPI, interval = "confidence")[, "lwr"]
predict_data_CN$upper <- predict(model, newdata = CN_EPI, interval = "confidence")[, "upr"]
CN_EPI <- ggplot(data = CN_EPI, aes(y = Ruggeri_CN_pro, x = EPI_CN_2022, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_CN, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_CN, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("English Proficiency of China by province") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 32)) +
ylim(c(-0.13, 0.67)) +
guides(color = FALSE)
#turn over X-axis
CN_EPI <- CN_EPI + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
CN_EPI
# Save the plot as a JPEG image
ggsave(filename = " CN_EPI.jpg", plot = CN_EPI, device = "jpg", width = 12, height = 12)
##Netherlands drawing (English Proficiency)
model <- lm(Ruggeri_NL_pro ~ EPI_NL_2022, data = NL_EPI)
predict_data_NL <- data.frame(EPI_NL_2022 = NL_EPI$EPI_NL_2022, Ruggeri_NL_pro = NL_EPI$Ruggeri_NL_pro )
predict_data_NL$predicted <- predict(model, newdata = predict_data_NL)
predict_data_NL$lower <- predict(model, newdata = NL_EPI, interval = "confidence")[, "lwr"]
predict_data_NL$upper <- predict(model, newdata = NL_EPI, interval = "confidence")[, "upr"]
NL_EPI <- ggplot(data = NL_EPI, aes(y = Ruggeri_NL_pro, x = EPI_NL_2022, color = "#E41A1C")) +
geom_point(stat = "identity", position = "identity", size = 6) +
geom_line(data = predict_data_NL, aes(y = predicted), color = "#E41A1C", size = 2) +
geom_ribbon(data = predict_data_NL, aes(ymin = lower, ymax = upper), fill = "black", alpha = 0.3, linetype = "blank") +
ylab("Proportion of sample in big team science") +
xlab("English Proficiency of Netherlands by state") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 24),
panel.background = element_rect(fill = "white"),
axis.line = element_line(color = "black"),
aspect.ratio = 1) +
xlim(c(0, 32)) +
ylim(c(-0.18, 0.67)) +
guides(color = FALSE)
#turn over X-axis
NL_EPI <- NL_EPI + scale_x_reverse()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
NL_EPI
# Save the plot as a JPEG image
ggsave(filename = " NL_EPI.jpg", plot = NL_EPI, device = "jpg", width = 12, height = 12)
# Arrange 9 graphics horizontally, left and right
combined_plot2 <- KE_UP + CN_UP + US_UP + NG_IPR + CN_IPR + US_IPR + NG_EPI + CN_EPI + NL_EPI+
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(tag_levels ="A",
theme = theme(plot.title = element_text(size = 26)))
combined_plot2
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
# Save the plot as a pdf image
ggsave(filename = "Exploratory_analyses_by_state.pdf", plot = combined_plot2, device = "pdf", width = 24, height = 24)
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Removed 1 row containing missing values (`geom_line()`).
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.